import os
import re

def count_chinese_chars(text):
    """计算文本中的汉字数量"""
    # 使用正则表达式匹配汉字范围(0x4e00-0x9fff)
    chinese_chars = re.findall('[\u4e00-\u9fff]', text)
    return len(chinese_chars)

def count_chinese_in_directory(directory_path):
    """遍历指定目录下所有txt文件并计算汉字总数"""
    total_chinese_chars = 0
    file_count = 0
    
    # 遍历目录
    for root, dirs, files in os.walk(directory_path):
        for file in files:
            # 只处理txt文件
            if file.endswith('.txt'):
                file_path = os.path.join(root, file)
                try:
                    # 尝试以UTF-8读取文件
                    with open(file_path, 'r', encoding='utf-8') as f:
                        content = f.read()
                        chars_count = count_chinese_chars(content)
                        total_chinese_chars += chars_count
                        file_count += 1
                        print(f"文件 {file_path} 中包含 {chars_count} 个汉字")
                except UnicodeDecodeError:
                    # 如果UTF-8解码失败，尝试其他常见编码
                    for encoding in ['gbk', 'gb2312', 'big5']:
                        try:
                            with open(file_path, 'r', encoding=encoding) as f:
                                content = f.read()
                                chars_count = count_chinese_chars(content)
                                total_chinese_chars += chars_count
                                file_count += 1
                                print(f"文件 {file_path} (编码: {encoding}) 中包含 {chars_count} 个汉字")
                                break
                        except UnicodeDecodeError:
                            continue
                    else:
                        print(f"无法解码文件 {file_path}，已跳过")
    
    print(f"\n统计完成！共处理了 {file_count} 个txt文件")
    print(f"所有txt文件中共包含 {total_chinese_chars} 个汉字")
    
    return total_chinese_chars

if __name__ == "__main__":
    # 获取用户输入的目录路径
    directory = input("请输入要统计的目录路径: ")
    
    # 检查目录是否存在
    if os.path.isdir(directory):
        count_chinese_in_directory(directory)
    else:
        print("目录不存在，请检查路径是否正确")
